---
title: "Descriptive analysis"
author: "Jae Yeon Kim"
date: "`r Sys.Date()`"
output:
  html_document:
    df_print: paged
---

```{r}
if (!require(pacman)) install.packages("pacman")

pacman::p_load(tidyverse, # tidyverse 
               plm, # panel data analysis
               pglm, # generalized panel data analysis
               clubSandwich, # robust SEs
               here, # computational reproducibility 
               glue, # gluing objects and strings 
               tidylog, # logging analysis
               naniar, # missing data 
               zeallot, # multiple assignments 
               readxl, 
               ggpubr,
               hrbrthemes, 
               wesanderson,
               broom, 
               jtools, 
               patchwork, 
               broom.mixed, 
               estimatr, 
               stargazer,
               DeclareDesign,
               sensemakr,
               mice,
               usmap,
               janitor,
               modelsummary, 
               flextable,
               officer,
               extrafont,
               gt,
               bootstrap,
               lme4,
               clipr)

source(here("functions/utils.r"))
source(here("functions/theme.R"))

ggplot2::theme_set(theme_bw())

# no scientific notation
options(scipen = 999)
```

# Load files 

```{r message = FALSE}
df <- read_csv(here("processed_data", "panel_data_recoded.csv"))[,-c(1:2)]

df2 <- subset(df, wave == 2)
```

# Regression analysis

## Likely to vote 

```{r}
# rename the DV
df$likely_vote <- df$`2020likelyvote`

# rescale the discrimination perception variables
df$apa.discrim.rona <- df$apa.discrim.rona + .5
df$gendiscrim <- df$gendiscrim + .5

# create a proxy variable
df$proxy <- rep(subset(df, wave == 1)$gendiscrim, 3)

## check the proxy data distribution
df$proxy %>% table()

df$prior[df$proxy == 0.5] <- "Middle"
df$prior[df$proxy < 0.5] <- "Low"
df$prior[df$proxy > 0.5] <- "High"

min(df$proxy) ; max(df$proxy)
```

Group definition 

    - Middle = 0.5  
    - 1 >= High > 0.5 
    - 0 <= Low < 0.5 

```{r}
# create factor variables 

## covariates 
df$usborn <- factor(df$usborn)
df$male <- factor(df$male)
df$chinese <- factor(df$chinese)
df$indian <- factor(df$indian)
df$GOP <- factor(df$GOP)
df$DEM <- factor(df$DEM)
df$wave <- factor(df$wave)
df$nat_origin <- factor(df$natorigin)
```

### Table C7

```{r}
cm <- c("apa.discrim.rona" = "COIVD Discrim",
        "gendiscrim" = "General Discrim (Post COVID",
        "usborn1" = "US Born",
        "edu" = "Education",
        "DEM1" = "Democratic Party",
        "GOP1" = "Republican Party",
        "age" = "Age",
        "male1" = "Male",
        "wave3" = "Wave3",
        "proxy" = "Wave 1 General Discrim",
        "apa.discrim.rona:proxy" = "COVID Discrim* Pre-COVID General Discrim",
        "chinese1" = "Chinese",
        "indian1" = "Indian")
```

```{r}
turnout_mod <- list(
  
    "OLS" = lm(likely_vote ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df),
    
    "Weighted" = lm(likely_vote ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df, weight = weights),
    
    "Within" = plm(likely_vote ~ gendiscrim + apa.discrim.rona + DEM + GOP + age + nat_origin, data = df, index = c("pid", "wave"), model = "within"),
    
    "Interaction term" = lm(likely_vote ~ apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + proxy + apa.discrim.rona:proxy + nat_origin, data = df)
    
    )
```

```{r}
modelsummary(turnout_mod,
             fmt = 3,
             coef_map = cm,
             coef_omit = "Intercept|nat_origin",
             statistic = c("p = {p.value}"),
             output = here("outputs", "table_c7.docx"))
```

## Candidate preference

### Table C8 

```{r}
biden_mod <- list(
    
    "Logit" = glm(biden ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df, family = binomial),
    
    "Weighted" = glm(biden ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df, family = binomial, weight = weights),
    
    "Within" = plm(biden ~ gendiscrim + apa.discrim.rona + DEM + GOP + age + nat_origin + wave, data = df, index = c("pid", "wave"), model = "within", family = binomial),    
    
    "Interaction" = glm(biden ~  apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + proxy + apa.discrim.rona:proxy + nat_origin, data = df, family = binomial)

  )
```

```{r}
modelsummary(biden_mod,
             fmt = 3,
             coef_map = cm,
             coef_omit = "Intercept|nat_origin",
             statistic = c("p = {p.value}"),
             output = here("outputs", "table_c8.docx"))
```